In [1]:
import pandas

In [2]:
df = pandas.read_csv("./Cluster-Crime-Janeiro.csv")

In [3]:
novo = df[df['CLUSTER'] == 1] #Fazer For loop para percorrer todas regiões

In [4]:
crime_hours = novo[['HORA']]

In [5]:
crime_hours_total = crime_hours.groupby('HORA').size()

In [6]:
crime_hours_counts = novo[['HORA']].groupby('HORA').sum()

In [7]:
crime_hours_counts['TOTAL'] = crime_hours_total

In [8]:
all_hours_types = crime_hours_counts.sort_values(by='TOTAL', ascending=False)

In [9]:
all_hours_types


Out[9]:
TOTAL
HORA
17 4
13 3
11 2
14 2
18 2
02 1
05 1
06 1
08 1
09 1
10 1
15 1
19 1
20 1

In [10]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as ticker

In [15]:
all_hours_types.plot(kind='barh', figsize=(10,8), color='#cc0000')
plt.title('Crimes by Hour (Jan 2017)')
plt.xlabel('Número de ocorrências')
plt.ylabel('Hora do dia')
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.show()


Os minutos estão fazem o horário do crime distinguir


In [18]:
horas_mes = df.HORA.value_counts()

In [19]:
horas_mes.plot(kind='barh', figsize=(10,8), color='#cc0000')
plt.title('Crimes by Hour (Jan 2017)')
plt.xlabel('Número de ocorrências')
plt.ylabel('Hora do dia')
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.show()



In [ ]: